KID <- assessment_panel %>%
select(KID) %>%
unique()
child_record <- read.csv("../../../data/data_PSID/main/childbirth/Childbirth.csv") %>%
mutate(X = NULL) %>% #<- load childbirth file
filter(CAH5 == 2) %>% # keep only women
filter(CAH10 != 0) %>% # drop women who have no children according to this file
filter(CAH7 != 9998) %>% # drop women with missing birth year info
mutate(MID = CAH3 * 1000 + CAH4, KID = CAH10 * 1000 + CAH11) %>%
group_by(MID) %>%
filter(!any(CAH10 == 9999))
mother_index <- child_record %>%
select(MID, KID) %>%
inner_join(KID) %>%
select(MID) %>%
unique()
C <- child_record %>% #<- this matches the sample from before. All good.
inner_join(mother_index)
MakePanel <- function(d) {
year <- (d$CAH7[1] + 16):2017
n_y <- length(year)
knum <- d$CAH108[1]
age_youngest <- integer(n_y) - 1
age_oldest <- integer(n_y) + Inf
num_0_5 <- integer(n_y)
num_6_12 <- integer(n_y)
num_child <- integer(n_y)
m_age <- year - d$CAH7[1]
for (i in 1:length(year)) {
ay <- Inf
ao <- -1
a5 <- 0
a6 <- 0
for (k in 1:nrow(d)) {
agek <- year[i] - d$CAH15[k]
if (agek >= 0 && agek <= 18) {
ay <- min(ay, agek)
ao <- max(ao, agek)
num_child[i] <- num_child[i] + 1
}
if (agek >= 0 && agek <= 5) {
a5 <- a5 + 1
}
if (agek > 5 && agek <= 12) {
a6 <- a6 + 1
}
}
age_youngest[i] <- ay
age_oldest[i] <- ao
num_0_5[i] <- a5
num_6_12[i] <- a6
}
data.frame(MID = d$MID[1], m_age = m_age, year = year, knum = knum, num_child = num_child, age_youngest = age_youngest, age_oldest = age_oldest, num_0_5 = num_0_5, num_6_12 = num_6_12, y_first = d$CAH15[1])
}
mother_panel <- C %>%
group_by(MID) %>%
do(MakePanel(.)) %>%
filter(y_first >= 1960) %>%
left_join(passage_comprehension) # Add mother's cognitive score
Ind <- identifiers_panel %>%
mutate(MID = intnum68 * 1000 + pernum, mar_stat = mpair > 0)
Ind <- Ind %>%
filter(year >= 1999) %>%
mutate(year = year - 1) %>%
rbind(Ind)
# this will be useful throughout: create a dataframe that maps mothers to their interview number and sequence number
D_Ind <- mother_panel %>%
inner_join(Ind)
main_childcare <- main_childcare %>%
inner_join(D_Ind) %>% #<- merge to mothers through the panel file
mutate(year = year - 1) %>% #<- convert survey year to lag year
select(MID, year, childcare_exp)
mother_panel <- main_state %>%
right_join(Ind) %>% #<- merge with Individual file
right_join(mother_panel) %>% #<- merge with the panel we created (keeping all years)
arrange(MID, year) %>%
group_by(MID) %>%
fill(state, .direction = "downup") %>% #<- fill in missing observations
merge(state_codes) %>% #<- merge in the cross-walk with FIPS codes and SOI codes
left_join(main_childcare) #<= use only the main file to link
mother_panel <- mother_panel %>%
select(MID, year, m_age) %>%
right_join(Ind) %>% #<- merge with individual file, keep all observations from Ind
filter(is.na(m_age)) %>% #<- identify spouses by missing info from the merge
rename(FID = MID, snF = sn) %>% #<- rename variables for merge
filter(mpair > 0) %>% #<- only keep individuals who are not mothers but who are in a relationship in the same household
select(FID, year, intnum, mpair, snF) %>%
right_join(mother_panel) %>% #<- do the merge
mutate(snF = replace_na(snF, -1)) #<- replace missing values.
#Load miles to parents in 1988
d <- parent_distance %>%
rename(intnum88=intnum)
#Match miles to parents in 1988 by family intnum for mothers and fathers
id <- Ind  %>%
filter(year==1988) %>%
rename(id = MID)%>%
select(id, sn, intnum)%>%
rename(intnum88=intnum)
# mothers: use mid from the main sample, assign distance based on family intnum and sn
mother_distance <- mother_panel %>%
select(MID) %>%
unique() %>%
merge(id,by.x="MID",by.y="id") %>%
merge(d) %>%
mutate(m_parents_hundred = case_when(sn==1 ~ head_hundred_miles,sn==2 ~ sp_hundred_miles),
m_parents_ten = case_when(sn==1 ~ head_ten_miles,sn==2 ~ sp_ten_miles)) %>%
select(MID,m_parents_hundred,m_parents_ten)
# fathers: use fid from the main sample, assign distance based on family intnum and sn
father_distance <- mother_panel %>%
select(FID) %>%
drop_na() %>%
unique() %>%
merge(id,by.x="FID",by.y="id") %>%
merge(d) %>%
mutate(f_parents_hundred = case_when(sn==1 ~ head_hundred_miles,sn==2 ~ sp_hundred_miles),
f_parents_ten = case_when(sn==1 ~ head_ten_miles,sn==2 ~ sp_ten_miles)) %>%
select(FID,f_parents_hundred,f_parents_ten)
# #  ---- load marriage info and create some summary variables
M <- read.csv("../../../data/data_PSID/main/marriage/Marriage.csv") %>%
mutate(X = NULL) %>%
rename(MID = ID1) %>% #<- we rename to MID in preparation for a first merge with sample mothers
rename(ybirth = MH6) %>%
filter(ybirth < 9998) %>% #<- drop individuals with missing birth info
rename(ymar = MH11) %>%
filter(ymar != 9998) %>% #<- remove individuals without marriage info
mutate(record = ymar < 9999) %>% #<- ymar==9999 indicates never married
group_by(MID) %>%
summarise(ever_married = sum(record) > 0, ybirth = ybirth[1])
mother_panel <- M %>%
inner_join(mother_panel)
mother_panel <- D_Ind %>%
select(MID, year, m_age) %>%
right_join(Ind) %>%
filter(is.na(m_age)) %>% #<- pick out individuals from the merge who were *not* matched
rename(FID = MID, snF = sn) %>%
select(c("FID", "year", "intnum", "mpair", "snF")) %>%
filter(mpair > 0) %>%
right_join(mother_panel)
mother_panel <- M %>%
rename(FID = MID, ybirthF = ybirth) %>%
right_join(mother_panel) %>%
mutate(f_age = year - ybirthF)
# normalized to year 2002
cpi = read.csv("../../../data/data_PSID/CPI-U.csv") %>%
rename(year = YEAR) %>%
mutate(CPIU = CPIU/CPIU[56])
mother_panel <- mother_panel %>% #<- drop if mother has missing race or education info
inner_join(education, by = c("MID" = "ID")) %>%
rename(m_ed = ed) %>%
left_join(education, by = c("FID" = "ID")) %>%
rename(f_ed = ed) %>%
left_join(race, by = c("MID" = "ID")) %>%
rename(m_race = Race) %>%
left_join(race, by = c("FID" = "ID")) %>%
rename(f_race = Race)%>%
left_join(cpi)
earnings_panel <- mother_panel %>%
select(MID, FID, year, intnum, sn, snF) %>%
inner_join(earnings_panel) %>% #<- merge based on year and interview number
mutate(m_hrs = case_when(sn == 1 ~ hours_head, sn == 2 ~ hours_spouse), m_earn = case_when(sn == 1 ~ earn_head, sn == 2 ~ earn_spouse)) %>%
mutate(f_hrs = case_when(snF == 1 ~ hours_head, snF == 2 ~ hours_spouse), f_earn = case_when(snF == 1 ~ earn_head, snF == 2 ~ earn_spouse)) %>%
mutate(m_wage = m_earn / m_hrs, f_wage = f_earn / f_hrs) %>%
select(MID, year, m_earn, m_hrs, m_wage, f_earn, f_hrs, f_wage) %>% #<- we don't keep FID because the coupling MID,FID is dynamic and may not match the pair (MID,FID) from the previous year when we merge these data back in
mutate(year = year - 1) #<- these variable refer to the previous year so we subtract one
# we link first to mothers:
T2m <- mother_panel %>%
select(MID) %>%
unique() %>%
inner_join(earnings_panel_supplement) %>% #<- first four lines pick out mothers in CDS sample
rename(m_earn = earn, m_hrs = hrs) %>%
mutate(m_wage = m_earn / m_hrs) %>%
select(MID, year, m_earn, m_wage, m_hrs) #<- these are mothers so drop the FID variable
# and then to fathers
T2f <- mother_panel %>%
select(FID) %>%
unique() %>%
inner_join(earnings_panel_supplement) %>% #<-first four lines pick out "fathers" in sample (recall we are labelling all spouses as fathers)
rename(f_earn = earn, f_hrs = hrs) %>%
mutate(f_wage = f_earn / f_hrs) %>%
select(FID, year, f_earn, f_wage, f_hrs) #<- these are fathers so drop the MID variable
# now we link the pair (MID,FID,year) first to mothers (MID,year) and then to fathers (MID,year), *then* we append all of this to the data from the main interview
mother_panel <- mother_panel %>%
left_join(total_income)
mother_panel <- mother_panel %>%
select(MID, FID, year) %>%
inner_join(T2m) %>%
left_join(T2f) %>%
select(-FID) %>%
rbind(earnings_panel) %>% #<- add all of this to the original data frame with the interview year questions
mutate(house_earn = case_when(!is.na(f_earn) ~ f_earn + m_earn, TRUE ~ m_earn), m_wage = na_if(na_if(m_wage, Inf), 0), f_wage = na_if(na_if(f_wage, Inf), 0)) %>%
right_join(mother_panel) # %>%
#load occupation data ad assign occupation to wife and husband by interview number and sequence number
occupations <- read.csv("../../../data/data_PSID/main/occupation/output/occ.csv")
mother_panel <- mother_panel %>%
left_join(occupations)%>%
mutate(m_occ_major = case_when(sn == 1 ~ occ_major_h, sn == 2 ~ occ_major_s),
m_occ_minor = case_when(sn == 1 ~ occ_minor_h, sn == 2 ~ occ_minor_s),
f_occ_major = case_when(snF == 1 ~ occ_major_h, snF == 2 ~ occ_major_s),
f_occ_minor = case_when(snF == 1 ~ occ_minor_h, snF == 2 ~ occ_minor_s))
mother_panel <- mother_panel %>% #set to missing age of children if no children
mutate(
age_oldest = replace(age_oldest, which(num_child==0), NA_real_),
age_youngest = replace(age_youngest, which(num_child==0), NA_real_),
#set to missing earnings if negative
m_earn = replace(m_earn, which(m_earn<0), NA_real_),
f_earn = replace(f_earn, which(f_earn<0), NA_real_),
m_wage = replace(m_wage, which(m_wage<0), NA_real_),
f_wage = replace(f_wage, which(f_wage<0),NA_real_),
house_earn = replace(house_earn, which(house_earn<0), NA_real_)
)
mother_panel <- mother_panel %>%
mutate(
# generate 0/1 indicator for current marrital status
curr_married=case_when(
mar_stat == "TRUE" ~ 1,
mar_stat == "FALSE" ~ 0),
#generate race indicators for mothers
m_white = ifelse(m_race==1,1,0),
m_black=ifelse(m_race==2,1,0),
m_r_oth=ifelse(m_race>2,1,0),
#generate race indicators for fathers
f_white = ifelse(f_race==1,1,0),
f_black=ifelse(f_race==2,1,0),
f_r_oth=ifelse(f_race>2,1,0),
#generate education indicators for fathers
fed_hsd = ifelse(f_ed== "<12",1,0),
fed_hs = ifelse(f_ed== "12",1,0),
fed_scoll = ifelse(f_ed== "13-15",1,0),
fed_coll = ifelse(f_ed== "16",1,0),
fed_postcol = ifelse(f_ed== ">16",1,0),
fed_collplus = fed_coll+fed_postcol,
fed_scollplus =fed_scoll+ fed_coll+fed_postcol,
#generate education indicators for mothers
med_hsd = ifelse(m_ed== "<12",1,0),
med_hs = ifelse(m_ed== "12",1,0),
med_scoll = ifelse(m_ed== "13-15",1,0),
med_coll = ifelse(m_ed== "16",1,0),
med_postcol  = ifelse(m_ed== ">16",1,0),
med_collplus = med_coll+med_postcol,
med_scollplus = med_scoll+med_coll+med_postcol,
#generate a variable measuring number of children younger than 12
num_0_12=num_0_5+num_6_12,
#mom age at oldest child's birth
momageatbirth1 = m_age - age_oldest,
# create measures of potential work experience
m_exper = case_when( med_scoll == 1~m_age-20, med_coll == 1 ~ m_age-22, med_postcol == 1 ~ m_age-24, TRUE ~m_age-18),
f_exper = case_when ( fed_scoll == 1~ f_age-20, fed_coll == 1 ~ f_age-22, fed_postcol == 1 ~ f_age-24, TRUE ~f_age-18),
m_exper2 = m_exper*m_exper,
f_exper2 = f_exper*f_exper)
#Generate non-labour income measure for married and single parents
mother_panel <- mother_panel %>%
mutate(non_lab_income= case_when(
curr_married == 1 ~ total_income-m_earn-f_earn,
curr_married == 0 ~ total_income-m_earn)
) %>%
mutate(non_lab_income=non_lab_income/52) #annual to weekly
mother_panel <- mother_panel %>%
#mom is between 16 and 45 at birth of the oldest child (or missing)%>%
mutate(
ind_not_sample=case_when(
momageatbirth1>=16 & momageatbirth1<=45 ~ 0,
is.na(momageatbirth1)==1~0,
TRUE~1)) %>%
#mom and dad are between 18 and 65 or age missing
mutate(
ind_not_sample=case_when(
m_age>=18&m_age<=65 ~ ind_not_sample,
is.na(m_age)==1~ind_not_sample,
TRUE~1)
) %>%
mutate(
ind_not_sample=case_when(
f_age>=18&f_age<=65 ~ ind_not_sample,
is.na(f_age)==1 ~ ind_not_sample, TRUE~1)
)
#Calculate quantiles for observations that will remain in the sample (ind_not_sample==0) by year\broad education group
q_m <- mother_panel %>%
filter(ind_not_sample==0)%>%
group_by(year, med_scollplus) %>%
summarize(
qm1 = quantile(m_wage, 0.01, na.rm = TRUE),
qm99 = quantile(m_wage, probs = 0.99, na.rm = TRUE))
q_f <- mother_panel %>%
filter(ind_not_sample==0)%>%
group_by(year, fed_scollplus) %>%
summarize(
qf1 = quantile(f_wage, 0.01, na.rm = TRUE),
qf99 = quantile(f_wage, probs = 0.99, na.rm = TRUE))
mother_panel <- mother_panel %>%
left_join(q_m) %>%
left_join(q_f) %>%
mutate(m_wage=case_when(m_wage >= qm1 & m_wage <= qm99 ~ m_wage, TRUE~ NA_real_)) %>%
mutate(f_wage=  case_when(f_wage >= qf1 & f_wage <= qf99 ~ f_wage, TRUE~ NA_real_)) %>%
select(-qf1,-qf99,-qm1,-qm99)
#Deflate wages, annual earning,  non-labour income to 2002 dollars
mother_panel <- mother_panel %>%
mutate(m_wage=m_wage/CPIU,
f_wage=f_wage/CPIU,
m_earn=m_earn/CPIU,
f_earn=f_earn/CPIU,
total_income=total_income/CPIU,
tax_income=tax_income/CPIU,
non_lab_income=non_lab_income/CPIU)
# Calculate log wage measures for trimmed wages
mother_panel <- mother_panel %>%
mutate(
ln_wage_m=log(m_wage),
ln_wage_f=log(f_wage),
ln_wage_fm_ratio = ln_wage_f - ln_wage_m)
write.csv(mother_panel, "../../../data/data_derived/MotherPanelCDS.csv")
write.dta(mother_panel, "../../../data/data_derived/MotherPanelCDS.dta")
nrow(mother_panel)
child_panel <- KID %>%
inner_join(C) %>%
group_by(KID) %>%
filter(sum(CAH2 == 2) == 0) %>% # drop children with adoption records
rename(ybirth_child = CAH15) %>%
mutate(ybirth_child = na_if(ybirth_child, 9998)) %>%
select(MID, KID, ybirth_child) %>%
merge(D) %>%
mutate(age = year - ybirth_child) %>%
left_join(assessment_panel) %>%
left_join(cds_expenditures) %>%
left_join(cds_childcare) %>%
left_join(TD_agg) %>%
left_join(PPE) %>%
left_join(age_month)%>%
left_join(staff_ratio_panel)%>%#staff to child ratios by age of child in months
left_join(relative_present)%>%
filter(year>=1997,year<=2007)
#Add mobility data and 1988 distance to parents
child_panel <- child_panel %>%
left_join(head_grow_up) %>%
left_join(mother_distance) %>%
left_join(father_distance)
child_panel <- child_panel %>% #generate HH weekly childcare expenditure per child
mutate(chcare_hh=childcare_exp/52,
chcare_hh_pc=chcare_hh/num_0_12,
#generate total goods expenditures
hhinvest = SchSupplies + sports + lessons + comm_grps + tutoring + Toys)
head(child_panel)
head(head_grow_up)
nrow(head_grow_up)
nrow(child_panel)
child_panel %>% select(KID,year) %>% unique() %>% nrow()
child_panel %>% select(KID,MID,FID,year) %>% unique() %>% nrow()
child_panel %>% select(KID,MID,year) %>% unique() %>% nrow()
child_panel <- KID %>%
inner_join(C) %>%
group_by(KID) %>%
filter(sum(CAH2 == 2) == 0) %>% # drop children with adoption records
rename(ybirth_child = CAH15) %>%
mutate(ybirth_child = na_if(ybirth_child, 9998)) %>%
select(MID, KID, ybirth_child) %>%
merge(D) %>%
mutate(age = year - ybirth_child) %>%
left_join(assessment_panel) %>%
left_join(cds_expenditures) %>%
left_join(cds_childcare) %>%
left_join(TD_agg) %>%
left_join(PPE) %>%
left_join(age_month)%>%
left_join(staff_ratio_panel)%>%#staff to child ratios by age of child in months
left_join(relative_present)%>%
filter(year>=1997,year<=2007)
nrow(child_panel)
child_panel <- KID %>%
inner_join(C) %>%
group_by(KID) %>%
filter(sum(CAH2 == 2) == 0) %>% # drop children with adoption records
rename(ybirth_child = CAH15) %>%
mutate(ybirth_child = na_if(ybirth_child, 9998)) %>%
select(MID, KID, ybirth_child) #%>%
nrow(child_panel)
child_panel <- KID %>%
inner_join(C) %>%
group_by(KID) %>%
filter(sum(CAH2 == 2) == 0) %>% # drop children with adoption records
rename(ybirth_child = CAH15) %>%
mutate(ybirth_child = na_if(ybirth_child, 9998)) %>%
select(MID, KID, ybirth_child) %>%
merge(mother_panel) %>%
mutate(age = year - ybirth_child) %>%
left_join(assessment_panel) %>%
left_join(cds_expenditures) %>%
left_join(cds_childcare) %>%
left_join(TD_agg) %>%
left_join(PPE) %>%
left_join(age_month)%>%
left_join(staff_ratio_panel)%>%#staff to child ratios by age of child in months
left_join(relative_present)%>%
filter(year>=1997,year<=2007)
nrow(child_panel)
child_panel %>% select(KID,MID,year) %>% unique() %>% nrow()
#great
#Add mobility data and 1988 distance to parents
child_panel <- child_panel %>%
left_join(head_grow_up) %>%
left_join(mother_distance) %>%
left_join(father_distance)
nrow(child_panel)
child_panel <- child_panel %>% #generate HH weekly childcare expenditure per child
mutate(chcare_hh=childcare_exp/52,
chcare_hh_pc=chcare_hh/num_0_12,
#generate total goods expenditures
hhinvest = SchSupplies + sports + lessons + comm_grps + tutoring + Toys)
child_panel <- child_panel %>%
#generate age at birth of the child
mutate(momageatbirth=m_age-age)
# Sample restriction: only households with children age 0-18
child_panel <- child_panel %>%
mutate(ind_not_sample=case_when((num_child>0 & age>=0 & age<=18) ~ ind_not_sample, TRUE~1))
#Deflate childcare and goods expenditures to 2002 dollars
child_panel <- child_panel %>%
mutate(chcare=chcare/CPIU,
chcare_second=chcare_second/CPIU,
chcare_hh_pc=chcare_hh_pc/CPIU,
chcare_hh =chcare_hh /CPIU,
hhinvest=hhinvest/CPIU)
D2 <- read.csv("~/Dropbox/PSID_CDS/data-derived/ChildPanelCDS.csv") %>% mutate(X=NULL)
nrow(D2)
nrow(child_panel)
length(names(D2))
length(names(child_panel))
names(D2)
D2$X.1 = NULL
length(names(D2))
all.equal(child_panel,D2)
D2$m_ed
child_panel$m_ed
#ok
#identical
hours_per_day <- Ch97 %>% filter(Q1H18>0) %>% summarise(mean(Q1H19/Q1H18,na.rm = TRUE))
hours_per_day[1,1]
hours_per_day
knitr::purl("~/Dropbox/JPE_Child_Devp_2020/FINAL_Replication_CLMP_JPE/code/data_cleaning/psid/clean_psid_data.qmd")
setwd("~/Dropbox/JPE_Child_Devp_2020/FINAL_Replication_CLMP_JPE/code/data_cleaning/psid")
knitr::purl("clean_psid_data.qmd")
d1 <- read.csv("../../../data/data_derived/ChildPanelCDS.csv")
d2 <- read.csv("~/Dropbox/PSID_CDS/data-derived/ChildPanelCDS.csv")
nrow(d1)
nrow(d2)
d3 <- read.dta("~/Dropbox/PSID_CDS/data-derived/ChildPanelCDS.dta")
nrow(d3)
d4 <- read.dta("../../../data/data_derived/ChildPanelCDS.dta")
nrow(d4)
child_panel
nrow(child_panel)
write.dta(child_panel, "../../../data/data_derived/ChildPanelCDS.dta")
d1$m_ed
d2$m_ed
d3$m_ed
d1 <- read.dta("../../../data/data_derived/MotherPanelCDS.dta")
d1$m_ed
d2 <- read.csv("../../../data/data_derived/MotherPanelCDS.csv")
d2$m_ed
d1$m_ed
D <- readxl::read_excel("../../../data/data_PSID/main/education/grades_completed.xlsx") %>%
mutate(ID = ER30001*1000 + ER30002)
D1 <- D[,seq(5,158,4)]
# this routine pulls out the most recent education variable, up to the year 2011.
# the data itself go until 2017, but we only go up to 2011 to replicate the data used in first draft of CLMP
D$educ <- -1
D$year_meas <- -1
years <- c(1968,seq(1970,1996),seq(1997,2017,2))
# replace with (i in 1:length(years)) if we want to update using measures from 2013-2017
for (i in 1:36) {
print(i)
I_use <- (D1[,i]>0) & (D1[,i]<98)
D[I_use,"educ"] <- D1[I_use,i]
D[I_use,"year_meas"] <- years[i]
}
# do some final data cleaning and save the file
education <- D %>%
mutate(educ = na_if(educ,-1),
ed=case_when(educ>16 ~ 5,educ == 16 ~ 4, educ>=13 ~ 3, educ>=12 ~ 2, educ<12 ~ 1)
ed=case_when(educ>16 ~ ">16",educ == 16 ~ "16", educ>=13 ~ "13-15", educ>=12 ~ "12", educ<12 ~ "<12")
) %>%
select(ID,educ,ed,year_meas) %>%
#mutate(ed = factor(ed,levels = c(1,2,3,4,5),labels=c("<12","12","13-15","16",">16"))) %>%
select(ID,ed) #<- just use this measure for now
D <- readxl::read_excel("../../../data/data_PSID/main/education/grades_completed.xlsx") %>%
mutate(ID = ER30001*1000 + ER30002)
D1 <- D[,seq(5,158,4)]
# this routine pulls out the most recent education variable, up to the year 2011.
# the data itself go until 2017, but we only go up to 2011 to replicate the data used in first draft of CLMP
D$educ <- -1
D$year_meas <- -1
years <- c(1968,seq(1970,1996),seq(1997,2017,2))
# replace with (i in 1:length(years)) if we want to update using measures from 2013-2017
for (i in 1:36) {
print(i)
I_use <- (D1[,i]>0) & (D1[,i]<98)
D[I_use,"educ"] <- D1[I_use,i]
D[I_use,"year_meas"] <- years[i]
}
# do some final data cleaning and save the file
education <- D %>%
mutate(educ = na_if(educ,-1),
#ed=case_when(educ>16 ~ 5,educ == 16 ~ 4, educ>=13 ~ 3, educ>=12 ~ 2, educ<12 ~ 1)
ed=case_when(educ>16 ~ ">16",educ == 16 ~ "16", educ>=13 ~ "13-15", educ>=12 ~ "12", educ<12 ~ "<12")
) %>%
select(ID,educ,ed,year_meas) %>%
#mutate(ed = factor(ed,levels = c(1,2,3,4,5),labels=c("<12","12","13-15","16",">16"))) %>%
select(ID,ed) #<- just use this measure for now
knitr::purl("clean_psid_data.qmd")
d1 <- read.dta("../../../data/data_derived/MotherPanelCDS.dta")
d2 <- read.csv("../../../data/data_derived/MotherPanelCDS.csv")
d1$m_ed
nrow(d1)
nrow(d2)
source("~/Dropbox/JPE_Child_Devp_2020/FINAL_Replication_CLMP_JPE/code/data_cleaning/psid/clean_psid_data.R")
